2021 ONE-Seq Pre-Cleaved Analysis Report

library(tidyverse)
library(here)
library(plotly)
library(scales)
library(tidyr)
library(kableExtra)
library(tibble)
## force full digit notation
options(scipen=999)

Please see accompanying report; “2021_ONEseq_Metadata_Report” for more information.

Vikram’s standalone QC script produces the following results:

  1. prefix.csv: read counts for each library member
  2. prefix.pdf: histogram of the coverage of the library members
  3. prefix.txt: some summary statistics on how many reads passed my filters

Using results from Vikram’s QC script determine the following:

  • number of rows with 0 reads, divide by total library size which should be % that Vikram’s QC script gives. Store these rows in a df.
  • bar plot of sequences with 0 reads
  • Calculate average, stdev, 95 CI
  • Outside of 95% interval, above and below: How many and what are their read counts? Keep these rows stored in a variable.

Compare to analysis:

  • Anything within the CI range, treat it equally. Anything outside of it, if they are in the analysis output, pull those (above & below), how do read counts look?
  • What is the expected average read count and on-target read count for the sample and range for the stdev.

Samantha’s Stats spreadsheet

Sample Information

All run bioinformatically by Sierra

  • SeQure: SeQure’s wet lab + sequencing of EMX1, FANCF, RNF2 (JJN01m)
  • NIST-R1: NIST wet lab + sequencing of EMX1, FANCF, RNF2 (NNN02m - pre-triplicate split)
  • NIST-R2: NIST wet lab + sequencing of EMX1, FANCF, RNF2 (NNN06m - pre-triplicate split) Technically R2’ since R2 failed sequencing

Defined Functions

All functions used in this report is coded here.

###############################################
## BAR PLOT FUNCTIONS
###############################################

## bar plots for sequences with zero reads
make_zero_read_qc_reads_plot <- function(df){
## count rows 
  plot_df <- df %>%
  select(lab, target_site, candidate_off_target_site) %>% 
  count(lab, target_site)
  
  ## plot 
  ggplotly(ggplot(plot_df) + 
    geom_bar(aes(x = target_site, y = n, fill = lab), 
             position = position_dodge(preserve = "single"), stat = "identity") +
    scale_y_continuous(labels = comma) +
    theme_bw() + 
    theme(axis.text.x = element_text(angle = 90)) + 
    labs(x = "Target Site", y = "Number of Sequences with 0 reads", 
         fill = "Lab") + scale_fill_manual(values=cbPalette))
}  

make_read_cutoff_bar_plot <- function(df, read_count_value){
## count rows 
  plot_df <- df %>%
  filter(post_selection_read_count >= read_count_value) %>% 
  select(lab, target_site, candidate_off_target_site) %>% 
  count(lab, target_site)
  
  plot_title <- paste("Number of Barcodes with at least", {{read_count_value}}, "Reads")
  
  ## plot 
  ggplotly(ggplot(plot_df) + 
    geom_bar(aes(x = target_site, y = n, fill = lab), 
             position = position_dodge(preserve = "single"), stat = "identity") +
    scale_y_continuous(labels = comma) +
    theme_bw() + 
    theme(axis.text.x = element_text(angle = 90)) + 
    labs(x = "Target Site", y = "Count", 
         fill = "Lab", title = plot_title) + scale_fill_manual(values=cbPalette))
}  

make_outlier_count_plots <- function(df){
  
  plot_df <- df %>% 
    select(post_selection_read_count, candidate_off_target_site, lab, target_site,
           CI_range) %>% 
    count(target_site, lab, CI_range) %>%
    rename(num_barcodes = n)
  
  ggplotly(ggplot(plot_df) +
      geom_bar(aes(x = target_site, 
                   y = num_barcodes,
                   fill = CI_range),
               position = position_dodge(preserve = "single"), 
               stat = "identity"
               ) +
          scale_y_continuous(labels = comma) +
      ## Split plot by summary metric
      facet_wrap(~lab, scales = "free_y") +
      theme_bw() +
      theme(axis.text.x = element_text(hjust = 0)) +
    labs(x = "Target Site", y = "Number of Barcodes", fill = "95% CI", 
         title = "Number of Barcodes Above and Below 95% Confidence Interval") +
      scale_fill_manual(values=cbPalette)) %>% 
    layout(legend = list(orientation = "h", x = 0.4, y = -0.2))
}

make_outlier_read_count_plot <- function(df, targ_site){
  targ_site_plot_df <- df %>% 
    filter(target_site == {{targ_site}})
  
  ggplotly(ggplot(targ_site_plot_df) +
    geom_bar(aes(x = post_selection_read_count, 
                 y = n,
                 fill = CI_range),
             position = position_dodge(preserve = "single"), 
             stat = "identity"
             ) +
        scale_y_continuous(labels = comma) +
    ## Split plot by summary metric
    facet_wrap(~lab, scales = "free_y") +
    theme_bw() +
    theme(axis.text.x = element_text(hjust = 0)) +
  labs(x = "Read Count", y = "Count", fill = "95% CI", 
       title = "Number of Barcodes") +
    scale_fill_manual(values=cbPalette)) %>% 
  layout(legend = list(orientation = "h", x = 0.4, y = -0.2))
}

make_outlier_read_count_table <- function(df, targ_site){
  (targ_site_plot_df <- df %>% 
    filter(target_site == {{targ_site}}) %>% 
    pivot_wider(names_from = "CI_range", values_from = "n", values_fill = 0) %>% 
    rename(Number.Barcodes.Above.95CI = above, 
           Number.Barcodes.Below.95CI = below,
           Target.Site = target_site,
           Lab = lab, 
           Read.Count = post_selection_read_count) %>% 
     arrange(Read.Count) %>%
    kbl() %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive", html_font = "Cambria")) %>% 
    kable_styling(font_size = 10))
}

Load Data

NOTE the .txt need to be altered manually in the following ways to be loaded here:

  1. Paste the following template into a .tsv file:
  • reads_processed n
  • reads_with_constant_seqs n
  • reads_with_identical_left-right_barcodes n
  • reads_with_expected_candidate_sites n
  • pct_lib_members_not_present n
  • library_uniformity_90-10 n
  1. Replace every n with the appropriate value from the script output file.

All file loading & munging is coded here.

########################################################################
#  READ COUNT SUMMARIES
########################################################################
## get list of files in results directory
QC_reads_file_list <- as.list(list.files(path = here("data/Vikram_QC/report_input"), 
                           pattern = "QC.csv",
                           include.dirs = TRUE, full.names = TRUE, 
                           recursive = TRUE))

## strip the path portion of the file names
QC_reads_file_names <- str_remove(QC_reads_file_list, 
                                  paste0(here("data/Vikram_QC/report_input"),"/"))

## organize the file list to read them into a df
QC_reads_lst <- set_names(QC_reads_file_list, QC_reads_file_names)

## make lists into a data frame
QC_reads_metadata_df <- tibble(QC_reads_file = unlist(QC_reads_file_names)) %>% 
  separate(QC_reads_file, c("lab","target_site"), 
           sep = "_", remove = FALSE) %>%
  mutate(target_site = str_remove(target_site, "QC.tsv"))

## read in files & annotate with the metadata
QC_reads_df <- QC_reads_lst %>%
  map_dfr(read_csv, .id = "QC_reads_file") %>% 
  left_join(QC_reads_metadata_df) 

## create a df that holds lib size info
target_site <- c('EMX1', 'FANCF', 'RNF2') 
lib_size <- c(51743, 29207, 26102) ## num rows of lib.txt files
lib_size_df <- data.frame(target_site, lib_size)

QC_reads_df <- QC_reads_df %>% 
  left_join(lib_size_df)

########################################################################
# SUMMARY OUTPUT
########################################################################

## get list of files in results directory
qc_file_list <- as.list(list.files(path = here("data/Vikram_QC/report_input"),
                           pattern = "QC.tsv",
                           include.dirs = TRUE, full.names = TRUE,
                           recursive = TRUE))

## strip the path portion of the file names
qc_file_names <- str_remove(qc_file_list, paste0(here("data/Vikram_QC/report_input"),"/"))

## organize the file list to read them into a df
qc_df_lst <- set_names(qc_file_list, qc_file_names)

## make lists into a data frame
qc_metadata_df <- tibble(qc_file = unlist(qc_file_names)) %>%
  separate(qc_file, c("lab","target_site"),
           sep = "_", remove = FALSE) %>%
  mutate(target_site = str_remove(target_site, "QC.sv"))

## read in files & annotate with the metadata
qc_df <- qc_df_lst %>%
  map_df(read_tsv, col_names = c("var","number_of_seqs"), .id = "sample") %>%
  mutate(var= str_remove(var, "number_of_")) %>%
  separate(sample,c("lab","target_site"), sep = "_", remove = FALSE) %>%
  select(-sample) %>%
  mutate(target_site = str_remove(target_site, ".QC.sv"))

########################################################################
# COLOR PALETTE 
########################################################################
## establish color palette for plots
cbPalette <- c("#000000", 
               "#E69F00", 
               "#56B4E9", 
               "#009E73", 
               "#F0E442", 
               "#0072B2", 
               "#D55E00", 
               "#CC79A7", 
               "#999999", 
               "#20F03B",
               "#FEC44F", 
               "#D95F0E", 
               "#756BB1", 
               "#FF8A33", 
               "#D7BE07", 
               "#A807D7")

########################################################################
# CLEAR OBJECTS 
########################################################################
rm(QC_reads_file_list)
rm(QC_reads_file_names)
rm(QC_reads_lst)
rm(qc_file_list)
rm(qc_file_names)
rm(qc_df_lst)
rm(QC_reads_metadata_df)
rm(qc_metadata_df)

Sequences with Zero Reads

Investigate how many barcodes present with 0 sequenced reads pre-cleavage per target site & lab.

PROCESS

  1. Pull all rows/sequences with zero reads & store in a data frame.

  2. Validate the following calculated results against the script-generated QC file: “x% of library members were not present.” Check via: Number of rows with 0 reads, divide by total library size

  3. Plot quantity of drop-off data across samples.

## df of all zero read sequences
zero_count_df <- QC_reads_df %>%
  filter(post_selection_read_count == 0)

## plot how many zero-read count seqs per target site and lab
make_zero_read_qc_reads_plot (zero_count_df)
###############################################################
## sanity check - calculate % library members not present
## against Vikram's QC script output
  
## create sanity check df and perform calculations
zero_count_sanity_check_df <- zero_count_df %>% 
  select(lab, target_site, candidate_off_target_site) %>% 
  count(lab, target_site) %>% 
  left_join(lib_size_df) %>%
  group_by(target_site, lab) %>% 
  mutate(pct_lib_mems_not_present = round((n/lib_size)*100, digits = 3)) %>%
  rename(num_seqs_with_zero_reads = n) 

## compare these values to the values calculated in Virkam's QC table
(zero_count_qc_check <- qc_df %>% 
  filter(var == "pct_lib_members_not_present") %>% 
  pivot_wider(names_from = var, values_from = number_of_seqs, values_fill = NA) %>%
  right_join(zero_count_sanity_check_df) %>% 
    arrange(target_site) %>%
  kbl() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive", html_font = "Cambria")) %>% 
    kable_styling(font_size = 10))
lab target_site pct_lib_members_not_present num_seqs_with_zero_reads lib_size pct_lib_mems_not_present
JJN01m EMX1 0.427 221 51743 0.427
NNN02m EMX1 0.044 23 51743 0.044
NNN06m EMX1 0.062 32 51743 0.062
JJN01m FANCF 0.168 49 29207 0.168
NNN02m FANCF 0.041 12 29207 0.041
NNN06m FANCF 0.041 13 29207 0.045
JJN01m RNF2 0.092 24 26102 0.092
NNN02m RNF2 0.008 2 26102 0.008
NNN06m RNF2 0.008 2 26102 0.008

The column “pct_lib_members_not_present” contains the values calculated by Vikram’s QC script. The column “pct_lib_mems_not_present” are my calculations.

Read Cutoff Plots

Count how many barcodes are present per target site/lab at specific read count thresholds.

At least 6 reads

## plot 
make_read_cutoff_bar_plot(QC_reads_df, 6)
## table
(cutoff_count_df <- QC_reads_df %>%
  filter(post_selection_read_count >= 6) %>% 
  select(lab, target_site, candidate_off_target_site) %>% 
  count(lab, target_site) %>% 
  rename(num_seqs_w_at_least_6_reads = n) %>% 
    arrange(target_site) %>%
  kbl() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive", html_font = "Cambria")))
lab target_site num_seqs_w_at_least_6_reads
JJN01m EMX1 47233
NNN02m EMX1 51618
NNN06m EMX1 51421
JJN01m FANCF 29003
NNN02m FANCF 29171
NNN06m FANCF 29175
JJN01m RNF2 26002
NNN02m RNF2 26094
NNN06m RNF2 26079

At least 10 reads

## plot 
make_read_cutoff_bar_plot(QC_reads_df, 10)
## table
(cutoff_count_df <- QC_reads_df %>%
  filter(post_selection_read_count >= 10) %>% 
  select(lab, target_site, candidate_off_target_site) %>% 
  count(lab, target_site) %>% 
  rename(num_seqs_w_at_least_10_reads = n) %>% 
    arrange(target_site) %>%
  kbl() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive", html_font = "Cambria")))
lab target_site num_seqs_w_at_least_10_reads
JJN01m EMX1 32531
NNN02m EMX1 51376
NNN06m EMX1 50201
JJN01m FANCF 28404
NNN02m FANCF 29120
NNN06m FANCF 29118
JJN01m RNF2 25884
NNN02m RNF2 26085
NNN06m RNF2 26044

At least 50 reads

## plot 
make_read_cutoff_bar_plot(QC_reads_df, 50)
## table
(cutoff_count_df <- QC_reads_df %>%
  filter(post_selection_read_count >= 50) %>% 
  select(lab, target_site, candidate_off_target_site) %>% 
  count(lab, target_site) %>% 
  rename(num_seqs_w_at_least_50_reads = n) %>% 
    arrange(target_site) %>%
  kbl() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive", html_font = "Cambria")))
lab target_site num_seqs_w_at_least_50_reads
NNN02m EMX1 3621
NNN06m EMX1 42
JJN01m FANCF 5
NNN02m FANCF 1096
NNN06m FANCF 2562
JJN01m RNF2 4105
NNN02m RNF2 20217
NNN06m RNF2 14899

At least 100 reads

## plot 
make_read_cutoff_bar_plot(QC_reads_df, 100)
## table
(cutoff_count_df <- QC_reads_df %>%
  filter(post_selection_read_count >= 100) %>% 
  select(lab, target_site, candidate_off_target_site) %>% 
  count(lab, target_site) %>% 
  rename(num_seqs_w_at_least_100_reads = n) %>% 
    arrange(target_site) %>%
  kbl() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive", html_font = "Cambria")))
lab target_site num_seqs_w_at_least_100_reads
JJN01m RNF2 2
NNN02m RNF2 204
NNN06m RNF2 52

Average, Standard Deviation, 95% CI

Calculate average, standard deviation, and 95% confidence interval of barcodes per target site/lab

NOTE Zero read barcodes are are included.

Column descriptions & process for generating values:

  • Average: mean() of post selection read count values for each lab/target site
  • Standard Deviation: sd() of post selection read count values for each lab/target site
  • 2 sigma -: = Average - (2 * Standard Deviation)
  • 2 sigma +: = Average + (2 * Standard Deviation)
  • # Barcodes < 95 CI range: sum() of post_selection_read_count <two_sigma_minus
  • # Barcodes > 95 CI range: sum() of post_selection_read_count > two_sigma_plus
  • % Barcodes < 95 CIrange: = num_barcodes_less_95CI / lib_size * 100
  • % Barcodes > 95 CIrange: = num_barcodes_more_95CI / lib_size * 100
## create new df with all of these calculations
stats_df <- QC_reads_df %>% 
  group_by(target_site, lab) %>% 
  mutate(average_read_count = mean(post_selection_read_count), 
         std_dev_read_count = sd(post_selection_read_count),
         two_sigma_minus = average_read_count-(2*std_dev_read_count),
         two_sigma_plus = average_read_count+(2*std_dev_read_count),
         num_barcodes_less_95CI = sum(post_selection_read_count < two_sigma_minus),
         num_barcodes_more_95CI = sum(post_selection_read_count > two_sigma_plus),
         pct_barcodes_less_95CI = paste0(round((num_barcodes_less_95CI/lib_size)*100, digits = 1), "%"),
         pct_barcodes_more_95CI = paste0(round((num_barcodes_more_95CI/lib_size)*100, digits = 1), "%")) %>% 
  mutate(CI_range = case_when(post_selection_read_count < two_sigma_minus ~ "below",
                              post_selection_read_count > two_sigma_plus ~ "above",
                              post_selection_read_count > two_sigma_minus & post_selection_read_count < two_sigma_plus ~ "within"))
                              

## downsize df and show summary values
(distinct(stats_df %>% 
    select(lab, target_site, lib_size, average_read_count, std_dev_read_count, 
           two_sigma_minus, two_sigma_plus, num_barcodes_less_95CI, num_barcodes_more_95CI, 
           pct_barcodes_less_95CI, pct_barcodes_more_95CI)) %>% 
    arrange(target_site) %>% 
  kbl() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive", html_font = "Cambria")) %>% 
    kable_styling(font_size = 10))
lab target_site lib_size average_read_count std_dev_read_count two_sigma_minus two_sigma_plus num_barcodes_less_95CI num_barcodes_more_95CI pct_barcodes_less_95CI pct_barcodes_more_95CI
JJN01m EMX1 51743 11.21572 4.453146 2.309428 20.12201 729 1392 1.4% 2.7%
NNN02m EMX1 51743 33.69673 10.387803 12.921127 54.47234 833 1486 1.6% 2.9%
NNN06m EMX1 51743 22.64525 7.464018 7.717210 37.57328 732 1629 1.4% 3.1%
JJN01m FANCF 29207 22.14065 6.923309 8.294033 35.98727 574 952 2% 3.3%
NNN02m FANCF 29207 32.01784 9.225183 13.567471 50.46821 372 900 1.3% 3.1%
NNN06m FANCF 29207 35.23436 10.195871 14.842619 55.62610 380 941 1.3% 3.2%
JJN01m RNF2 26102 37.94529 12.068096 13.809100 62.08148 440 743 1.7% 2.8%
NNN02m RNF2 26102 60.84974 15.205493 30.438757 91.26073 553 657 2.1% 2.5%
NNN06m RNF2 26102 52.44479 15.610406 21.223982 83.66560 560 699 2.1% 2.7%

Outside of 95% Confidence Interval

Investigate how many barcodes are outside the calculated 95% CI range across target sites & labs.

PROCESS

  • Bin according to barcodes above & below the CI threshold.
  • How many are present
  • Number of above & below 95% CI barcodes per target site & lab.
  • Plot
## reduce df to necessary columns, pivot to plot
outliers_df <- stats_df %>% 
  filter(!CI_range == "within")
  
make_outlier_count_plots(outliers_df)
(outlier_plot_df <- outliers_df %>% 
    select(post_selection_read_count, candidate_off_target_site, lab, target_site,
           CI_range) %>% 
    count(target_site, lab, CI_range) %>%
    rename(num_barcodes = n) %>% 
    pivot_wider(names_from = "CI_range", values_from = "num_barcodes", values_fill = NA) %>% 
    rename(Number.Barcodes.Above.95CI = above, 
           Number.Barcodes.Below.95CI = below,
           Target.Site = target_site,
           Lab = lab) %>% 
    kbl() %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive", html_font = "Cambria")))
Target.Site Lab Number.Barcodes.Above.95CI Number.Barcodes.Below.95CI
EMX1 JJN01m 1392 729
EMX1 NNN02m 1486 833
EMX1 NNN06m 1629 732
FANCF JJN01m 952 574
FANCF NNN02m 900 372
FANCF NNN06m 941 380
RNF2 JJN01m 743 440
RNF2 NNN02m 657 553
RNF2 NNN06m 699 560

Read counts of barcodes outside of 95% CI

Look at how many barcodes outside of the 95% confidence interval occur at specific read count values.

PROCESS

  1. Separate by target site
  2. Count how many barcodes exist at each read count value above & below CI range
  3. Plot
outlier_readcount_plot_df <- outliers_df %>% 
  select(post_selection_read_count, lab, target_site, CI_range) %>% 
  count(lab, target_site, CI_range, post_selection_read_count) 

EMX1

make_outlier_read_count_plot(outlier_readcount_plot_df, "EMX1")
make_outlier_read_count_table(outlier_readcount_plot_df, "EMX1")
Target.Site Lab Read.Count Number.Barcodes.Above.95CI Number.Barcodes.Below.95CI
EMX1 JJN01m 0 0 221
EMX1 NNN02m 0 0 23
EMX1 NNN06m 0 0 32
EMX1 JJN01m 1 0 161
EMX1 NNN02m 1 0 11
EMX1 NNN06m 1 0 28
EMX1 JJN01m 2 0 347
EMX1 NNN02m 2 0 13
EMX1 NNN06m 2 0 35
EMX1 NNN02m 3 0 23
EMX1 NNN06m 3 0 52
EMX1 NNN02m 4 0 23
EMX1 NNN06m 4 0 71
EMX1 NNN02m 5 0 32
EMX1 NNN06m 5 0 104
EMX1 NNN02m 6 0 40
EMX1 NNN06m 6 0 171
EMX1 NNN02m 7 0 52
EMX1 NNN06m 7 0 239
EMX1 NNN02m 8 0 62
EMX1 NNN02m 9 0 88
EMX1 NNN02m 10 0 119
EMX1 NNN02m 11 0 158
EMX1 NNN02m 12 0 189
EMX1 JJN01m 21 475 0
EMX1 JJN01m 22 320 0
EMX1 JJN01m 23 223 0
EMX1 JJN01m 24 143 0
EMX1 JJN01m 25 86 0
EMX1 JJN01m 26 54 0
EMX1 JJN01m 27 36 0
EMX1 JJN01m 28 18 0
EMX1 JJN01m 29 15 0
EMX1 JJN01m 30 10 0
EMX1 JJN01m 31 3 0
EMX1 JJN01m 32 4 0
EMX1 JJN01m 33 4 0
EMX1 JJN01m 36 1 0
EMX1 NNN06m 38 378 0
EMX1 NNN06m 39 323 0
EMX1 NNN06m 40 198 0
EMX1 NNN06m 41 165 0
EMX1 NNN06m 42 136 0
EMX1 NNN06m 43 106 0
EMX1 NNN06m 44 82 0
EMX1 NNN06m 45 58 0
EMX1 NNN06m 46 54 0
EMX1 NNN06m 47 48 0
EMX1 NNN06m 48 21 0
EMX1 NNN06m 49 18 0
EMX1 NNN06m 50 8 0
EMX1 NNN06m 51 13 0
EMX1 NNN06m 52 5 0
EMX1 NNN06m 53 6 0
EMX1 NNN06m 54 3 0
EMX1 NNN02m 55 276 0
EMX1 NNN06m 55 3 0
EMX1 NNN02m 56 200 0
EMX1 NNN06m 56 2 0
EMX1 NNN02m 57 177 0
EMX1 NNN06m 57 1 0
EMX1 NNN02m 58 156 0
EMX1 NNN02m 59 137 0
EMX1 NNN02m 60 101 0
EMX1 NNN02m 61 84 0
EMX1 NNN02m 62 55 0
EMX1 NNN06m 62 1 0
EMX1 NNN02m 63 63 0
EMX1 NNN02m 64 50 0
EMX1 NNN02m 65 41 0
EMX1 NNN02m 66 34 0
EMX1 NNN02m 67 25 0
EMX1 NNN02m 68 12 0
EMX1 NNN02m 69 18 0
EMX1 NNN02m 70 8 0
EMX1 NNN02m 71 6 0
EMX1 NNN02m 72 14 0
EMX1 NNN02m 73 6 0
EMX1 NNN02m 74 4 0
EMX1 NNN02m 75 4 0
EMX1 NNN02m 76 3 0
EMX1 NNN02m 77 3 0
EMX1 NNN02m 78 3 0
EMX1 NNN02m 79 1 0
EMX1 NNN02m 80 2 0
EMX1 NNN02m 82 2 0
EMX1 NNN02m 85 1 0

FANCF

make_outlier_read_count_plot(outlier_readcount_plot_df, "FANCF")
make_outlier_read_count_table(outlier_readcount_plot_df, "FANCF")
Target.Site Lab Read.Count Number.Barcodes.Above.95CI Number.Barcodes.Below.95CI
FANCF JJN01m 0 0 49
FANCF NNN02m 0 0 12
FANCF NNN06m 0 0 13
FANCF JJN01m 1 0 13
FANCF NNN02m 1 0 6
FANCF NNN06m 1 0 5
FANCF JJN01m 2 0 21
FANCF NNN02m 2 0 6
FANCF NNN06m 2 0 6
FANCF JJN01m 3 0 11
FANCF NNN02m 3 0 4
FANCF NNN06m 3 0 2
FANCF JJN01m 4 0 46
FANCF NNN02m 4 0 5
FANCF NNN06m 4 0 4
FANCF JJN01m 5 0 64
FANCF NNN02m 5 0 3
FANCF NNN06m 5 0 2
FANCF JJN01m 6 0 82
FANCF NNN02m 6 0 8
FANCF NNN06m 6 0 7
FANCF JJN01m 7 0 131
FANCF NNN02m 7 0 11
FANCF NNN06m 7 0 17
FANCF JJN01m 8 0 157
FANCF NNN02m 8 0 16
FANCF NNN06m 8 0 19
FANCF NNN02m 9 0 16
FANCF NNN06m 9 0 14
FANCF NNN02m 10 0 43
FANCF NNN06m 10 0 30
FANCF NNN02m 11 0 58
FANCF NNN06m 11 0 36
FANCF NNN02m 12 0 85
FANCF NNN06m 12 0 53
FANCF NNN02m 13 0 99
FANCF NNN06m 13 0 73
FANCF NNN06m 14 0 99
FANCF JJN01m 36 231 0
FANCF JJN01m 37 182 0
FANCF JJN01m 38 146 0
FANCF JJN01m 39 106 0
FANCF JJN01m 40 73 0
FANCF JJN01m 41 59 0
FANCF JJN01m 42 43 0
FANCF JJN01m 43 45 0
FANCF JJN01m 44 19 0
FANCF JJN01m 45 9 0
FANCF JJN01m 46 14 0
FANCF JJN01m 47 10 0
FANCF JJN01m 48 5 0
FANCF JJN01m 49 5 0
FANCF JJN01m 50 1 0
FANCF NNN02m 51 169 0
FANCF JJN01m 52 2 0
FANCF NNN02m 52 130 0
FANCF JJN01m 53 1 0
FANCF NNN02m 53 106 0
FANCF JJN01m 54 1 0
FANCF NNN02m 54 87 0
FANCF NNN02m 55 83 0
FANCF NNN02m 56 54 0
FANCF NNN06m 56 173 0
FANCF NNN02m 57 55 0
FANCF NNN06m 57 141 0
FANCF NNN02m 58 33 0
FANCF NNN06m 58 101 0
FANCF NNN02m 59 45 0
FANCF NNN06m 59 99 0
FANCF NNN02m 60 35 0
FANCF NNN06m 60 67 0
FANCF NNN02m 61 21 0
FANCF NNN06m 61 73 0
FANCF NNN02m 62 23 0
FANCF NNN06m 62 56 0
FANCF NNN02m 63 9 0
FANCF NNN06m 63 51 0
FANCF NNN02m 64 13 0
FANCF NNN06m 64 34 0
FANCF NNN02m 65 11 0
FANCF NNN06m 65 23 0
FANCF NNN02m 66 10 0
FANCF NNN06m 66 24 0
FANCF NNN02m 67 3 0
FANCF NNN06m 67 23 0
FANCF NNN02m 68 5 0
FANCF NNN06m 68 14 0
FANCF NNN02m 69 6 0
FANCF NNN06m 69 12 0
FANCF NNN02m 70 2 0
FANCF NNN06m 70 12 0
FANCF NNN06m 71 7 0
FANCF NNN06m 72 5 0
FANCF NNN06m 73 5 0
FANCF NNN06m 74 5 0
FANCF NNN06m 75 4 0
FANCF NNN06m 76 5 0
FANCF NNN06m 77 2 0
FANCF NNN06m 78 1 0
FANCF NNN06m 79 1 0
FANCF NNN06m 81 1 0
FANCF NNN06m 82 1 0
FANCF NNN06m 84 1 0

RNF2

make_outlier_read_count_plot(outlier_readcount_plot_df, "RNF2")
make_outlier_read_count_table(outlier_readcount_plot_df, "RNF2")
Target.Site Lab Read.Count Number.Barcodes.Above.95CI Number.Barcodes.Below.95CI
RNF2 JJN01m 0 0 24
RNF2 NNN02m 0 0 2
RNF2 NNN06m 0 0 2
RNF2 JJN01m 1 0 11
RNF2 NNN02m 1 0 1
RNF2 NNN06m 1 0 4
RNF2 JJN01m 2 0 9
RNF2 NNN02m 2 0 1
RNF2 NNN06m 2 0 4
RNF2 JJN01m 3 0 15
RNF2 NNN02m 3 0 1
RNF2 NNN06m 3 0 2
RNF2 JJN01m 4 0 13
RNF2 NNN02m 4 0 2
RNF2 NNN06m 4 0 3
RNF2 JJN01m 5 0 28
RNF2 NNN02m 5 0 1
RNF2 NNN06m 5 0 8
RNF2 JJN01m 6 0 25
RNF2 NNN02m 6 0 1
RNF2 NNN06m 6 0 4
RNF2 JJN01m 7 0 23
RNF2 NNN02m 7 0 1
RNF2 NNN06m 7 0 7
RNF2 JJN01m 8 0 34
RNF2 NNN02m 8 0 4
RNF2 NNN06m 8 0 11
RNF2 JJN01m 9 0 36
RNF2 NNN02m 9 0 3
RNF2 NNN06m 9 0 13
RNF2 JJN01m 10 0 34
RNF2 NNN02m 10 0 3
RNF2 NNN06m 10 0 26
RNF2 JJN01m 11 0 50
RNF2 NNN02m 11 0 5
RNF2 NNN06m 11 0 27
RNF2 JJN01m 12 0 61
RNF2 NNN02m 12 0 8
RNF2 NNN06m 12 0 18
RNF2 JJN01m 13 0 77
RNF2 NNN02m 13 0 10
RNF2 NNN06m 13 0 22
RNF2 NNN02m 14 0 5
RNF2 NNN06m 14 0 26
RNF2 NNN02m 15 0 15
RNF2 NNN06m 15 0 29
RNF2 NNN02m 16 0 8
RNF2 NNN06m 16 0 46
RNF2 NNN02m 17 0 8
RNF2 NNN06m 17 0 43
RNF2 NNN02m 18 0 11
RNF2 NNN06m 18 0 48
RNF2 NNN02m 19 0 14
RNF2 NNN06m 19 0 69
RNF2 NNN02m 20 0 16
RNF2 NNN06m 20 0 72
RNF2 NNN02m 21 0 15
RNF2 NNN06m 21 0 76
RNF2 NNN02m 22 0 22
RNF2 NNN02m 23 0 32
RNF2 NNN02m 24 0 31
RNF2 NNN02m 25 0 45
RNF2 NNN02m 26 0 40
RNF2 NNN02m 27 0 51
RNF2 NNN02m 28 0 56
RNF2 NNN02m 29 0 66
RNF2 NNN02m 30 0 75
RNF2 JJN01m 63 89 0
RNF2 JJN01m 64 80 0
RNF2 JJN01m 65 76 0
RNF2 JJN01m 66 64 0
RNF2 JJN01m 67 67 0
RNF2 JJN01m 68 62 0
RNF2 JJN01m 69 40 0
RNF2 JJN01m 70 37 0
RNF2 JJN01m 71 24 0
RNF2 JJN01m 72 39 0
RNF2 JJN01m 73 32 0
RNF2 JJN01m 74 16 0
RNF2 JJN01m 75 24 0
RNF2 JJN01m 76 12 0
RNF2 JJN01m 77 8 0
RNF2 JJN01m 78 8 0
RNF2 JJN01m 79 9 0
RNF2 JJN01m 80 9 0
RNF2 JJN01m 81 7 0
RNF2 JJN01m 82 6 0
RNF2 JJN01m 83 4 0
RNF2 JJN01m 84 8 0
RNF2 NNN06m 84 81 0
RNF2 JJN01m 85 2 0
RNF2 NNN06m 85 80 0
RNF2 JJN01m 86 5 0
RNF2 NNN06m 86 73 0
RNF2 JJN01m 87 3 0
RNF2 NNN06m 87 68 0
RNF2 JJN01m 88 1 0
RNF2 NNN06m 88 67 0
RNF2 JJN01m 89 1 0
RNF2 NNN06m 89 50 0
RNF2 JJN01m 90 2 0
RNF2 NNN06m 90 26 0
RNF2 JJN01m 91 2 0
RNF2 NNN06m 91 45 0
RNF2 NNN02m 92 73 0
RNF2 NNN06m 92 27 0
RNF2 NNN02m 93 75 0
RNF2 NNN06m 93 30 0
RNF2 NNN02m 94 62 0
RNF2 NNN06m 94 24 0
RNF2 JJN01m 95 1 0
RNF2 NNN02m 95 62 0
RNF2 NNN06m 95 13 0
RNF2 JJN01m 96 2 0
RNF2 NNN02m 96 56 0
RNF2 NNN06m 96 16 0
RNF2 NNN02m 97 45 0
RNF2 NNN06m 97 16 0
RNF2 NNN02m 98 40 0
RNF2 NNN06m 98 20 0
RNF2 JJN01m 99 1 0
RNF2 NNN02m 99 40 0
RNF2 NNN06m 99 11 0
RNF2 NNN02m 100 39 0
RNF2 NNN06m 100 7 0
RNF2 NNN02m 101 18 0
RNF2 NNN06m 101 8 0
RNF2 NNN02m 102 26 0
RNF2 NNN06m 102 6 0
RNF2 NNN02m 103 21 0
RNF2 NNN06m 103 8 0
RNF2 JJN01m 104 1 0
RNF2 NNN02m 104 11 0
RNF2 NNN06m 104 3 0
RNF2 NNN02m 105 19 0
RNF2 NNN06m 105 4 0
RNF2 NNN02m 106 10 0
RNF2 NNN06m 106 4 0
RNF2 NNN02m 107 13 0
RNF2 NNN06m 107 1 0
RNF2 NNN02m 108 12 0
RNF2 NNN06m 108 2 0
RNF2 NNN02m 109 4 0
RNF2 NNN06m 109 1 0
RNF2 JJN01m 110 1 0
RNF2 NNN02m 110 6 0
RNF2 NNN06m 110 3 0
RNF2 NNN02m 111 8 0
RNF2 NNN02m 112 4 0
RNF2 NNN06m 112 1 0
RNF2 NNN02m 113 4 0
RNF2 NNN06m 113 1 0
RNF2 NNN02m 114 3 0
RNF2 NNN02m 115 1 0
RNF2 NNN06m 115 1 0
RNF2 NNN02m 117 1 0
RNF2 NNN06m 118 1 0
RNF2 NNN02m 123 1 0
RNF2 NNN06m 123 1 0
RNF2 NNN02m 124 2 0
RNF2 NNN02m 129 1 0

Comparison to Pipeline Results

Investigate the relationship between pre-cleaved barcodes on the pipeline results of post-cleaved samples.

  • Anything within the CI range, treat it equally.
  • Anything outside of it, if they are in the analysis output, pull those (above & below), how do read counts look?
  • What is the expected average read count and on-target read count for the sample and range for the stdev.

SESSION INFORMATION

System Information

sessioninfo::platform_info()
##  setting  value                       
##  version  R version 4.0.1 (2020-06-06)
##  os       macOS  10.16                
##  system   x86_64, darwin17.0          
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/New_York            
##  date     2022-01-07

Package Versions

sessioninfo::package_info() %>% 
    filter(attached = TRUE) %>% 
    select(package, loadedversion, date, source) %>%
    knitr::kable(booktabs = TRUE, row.names = FALSE)
package loadedversion date source
assertthat 0.2.1 2019-03-21 CRAN (R 4.0.0)
backports 1.2.1 2020-12-09 CRAN (R 4.0.2)
bookdown 0.21 2020-10-13 CRAN (R 4.0.1)
broom 0.7.6 2021-04-05 CRAN (R 4.0.2)
Cairo 1.5-12.2 2020-07-07 CRAN (R 4.0.2)
cellranger 1.1.0 2016-07-27 CRAN (R 4.0.0)
cli 2.5.0 2021-04-26 CRAN (R 4.0.2)
colorspace 2.0-0 2020-11-11 CRAN (R 4.0.2)
crayon 1.4.1 2021-02-08 CRAN (R 4.0.2)
crosstalk 1.1.0.1 2020-03-13 CRAN (R 4.0.2)
data.table 1.13.6 2020-12-30 CRAN (R 4.0.2)
DBI 1.1.0 2019-12-15 CRAN (R 4.0.0)
dbplyr 2.1.1 2021-04-06 CRAN (R 4.0.2)
digest 0.6.27 2020-10-24 CRAN (R 4.0.2)
dplyr 1.0.6 2021-05-05 CRAN (R 4.0.2)
ellipsis 0.3.2 2021-04-29 CRAN (R 4.0.2)
evaluate 0.14 2019-05-28 CRAN (R 4.0.0)
fansi 0.4.1 2020-01-08 CRAN (R 4.0.0)
forcats 0.5.1 2021-01-27 CRAN (R 4.0.2)
fs 1.5.0 2020-07-31 CRAN (R 4.0.2)
generics 0.1.0 2020-10-31 CRAN (R 4.0.2)
ggplot2 3.3.3 2020-12-30 CRAN (R 4.0.2)
glue 1.4.2 2020-08-27 CRAN (R 4.0.2)
gtable 0.3.0 2019-03-25 CRAN (R 4.0.0)
haven 2.3.1 2020-06-01 CRAN (R 4.0.0)
here 1.0.1 2020-12-13 CRAN (R 4.0.1)
highr 0.8 2019-03-20 CRAN (R 4.0.0)
hms 1.1.0 2021-05-17 CRAN (R 4.0.2)
htmltools 0.5.1.1 2021-01-22 CRAN (R 4.0.2)
htmlwidgets 1.5.3 2020-12-10 CRAN (R 4.0.2)
httr 1.4.2 2020-07-20 CRAN (R 4.0.2)
jsonlite 1.7.2 2020-12-09 CRAN (R 4.0.2)
kableExtra 1.3.1 2020-10-22 CRAN (R 4.0.2)
knitr 1.31 2021-01-27 CRAN (R 4.0.2)
labeling 0.4.2 2020-10-20 CRAN (R 4.0.2)
lazyeval 0.2.2 2019-03-15 CRAN (R 4.0.0)
lifecycle 1.0.0 2021-02-15 CRAN (R 4.0.2)
lubridate 1.7.10 2021-02-26 CRAN (R 4.0.2)
magrittr 2.0.1 2020-11-17 CRAN (R 4.0.2)
modelr 0.1.8 2020-05-19 CRAN (R 4.0.0)
munsell 0.5.0 2018-06-12 CRAN (R 4.0.0)
pillar 1.6.1 2021-05-16 CRAN (R 4.0.2)
pkgconfig 2.0.3 2019-09-22 CRAN (R 4.0.0)
plotly 4.9.2.2 2020-12-19 CRAN (R 4.0.2)
purrr 0.3.4 2020-04-17 CRAN (R 4.0.0)
R6 2.5.0 2020-10-28 CRAN (R 4.0.2)
Rcpp 1.0.5 2020-07-06 CRAN (R 4.0.1)
readr 1.4.0 2020-10-05 CRAN (R 4.0.2)
readxl 1.3.1 2019-03-13 CRAN (R 4.0.0)
reprex 2.0.0 2021-04-02 CRAN (R 4.0.2)
rlang 0.4.10 2020-12-30 CRAN (R 4.0.2)
rmarkdown 2.7.4 2021-03-24 Github ()
rmdformats 1.0.2 2021-04-19 CRAN (R 4.0.2)
rprojroot 2.0.2 2020-11-15 CRAN (R 4.0.2)
rstudioapi 0.13 2020-11-12 CRAN (R 4.0.2)
rvest 1.0.0 2021-03-09 CRAN (R 4.0.2)
scales 1.1.1 2020-05-11 CRAN (R 4.0.0)
sessioninfo 1.1.1 2018-11-05 CRAN (R 4.0.2)
stringi 1.5.3 2020-09-09 CRAN (R 4.0.2)
stringr 1.4.0 2019-02-10 CRAN (R 4.0.0)
tibble 3.1.2 2021-05-16 CRAN (R 4.0.2)
tidyr 1.1.3 2021-03-03 CRAN (R 4.0.2)
tidyselect 1.1.0 2020-05-11 CRAN (R 4.0.0)
tidyverse 1.3.1 2021-04-15 CRAN (R 4.0.2)
utf8 1.1.4 2018-05-24 CRAN (R 4.0.0)
vctrs 0.3.8 2021-04-29 CRAN (R 4.0.2)
viridisLite 0.3.0 2018-02-01 CRAN (R 4.0.0)
webshot 0.5.2 2019-11-22 CRAN (R 4.0.0)
withr 2.3.0 2020-09-22 CRAN (R 4.0.2)
xfun 0.22 2021-03-11 CRAN (R 4.0.2)
xml2 1.3.2 2020-04-23 CRAN (R 4.0.0)
yaml 2.2.1 2020-02-01 CRAN (R 4.0.0)